## Evidence Against Partisan Presidential Influence in the Appropriations Process
## Andrew Hall and Maxwell Palmer
## May 8, 2011

## Replication of Berry, Burden and Howell, 2010
## "The President and the Distribution of Federal Spending" APSR 104(4)

## Package for clustered standard errors
#install.packages("Design")
library(Design)

## Replication data set provided by Chris Berry
bbh.data <- read.csv("bbh_district_data.csv", header=TRUE, sep=",")

## Replication of BBH Table 1 - Model 1
table1.data <- subset(bbh.data, !is.na(bbh.data$redist))

reg1.1 <- ols(high_lnoutlays_cpi ~ president + factor(year) + factor(panelid), data=table1.data, x=TRUE, y=TRUE)
clustered <- robcov(reg1.1,  table1.data$state)
cbind(clustered$coefficients[1:2], sqrt(diag(clustered$var))[1:2])

## Replication of BBH Table 1 - Model 2

reg1.2 <- ols(high_lnoutlays_cpi ~ president + majority + any_chair + any_rank + leader + mem_approps + mem_waysandmeans + party + freshman + cong_close + presmargin_state_abs + factor(year) + factor(panelid), data=table1.data, x=TRUE, y=TRUE)
clustered2 <- robcov(reg1.2, table1.data$state)
cbind(clustered2$coefficients[1:12], sqrt(diag(clustered2$var))[1:12])

## Replication of BBH Table 1 - Model 3

reg1.3 <- ols( high_lnoutlays_cpi ~  president +  majority +  any_chair +  any_rank +  leader +  mem_approps +  mem_waysandmeans +  party +  freshman +  cong_close +  presmargin_state_abs + factor( year) + factor( panelid) +  mem_agriculture +  mem_armedserv +  mem_banking +  mem_budget +  mem_dc +  mem_edlabor +  mem_govops +  mem_energycommerce +  mem_foreignaffairs +  mem_houseadmin +  mem_internatresources +  mem_judiciary +  mem_merchmarine +  mem_postoffice +  mem_pubworkstransport +  mem_rules +  mem_science +  mem_smallbus +  mem_standards +  mem_vetaffairs, data = table1.data, x=TRUE, y=TRUE)
clustered3 <- robcov(reg1.3,  table1.data$state)
cbind(clustered3$coefficients[1:12], sqrt(diag(clustered3$var))[1:12])


## Figure 1

## Average Outlays to Districts by Partisan Match and Party
fig.data <- data.frame(bbh.data$year, bbh.data$president, bbh.data$party, bbh.data$high_lnoutlays_cpi, bbh.data$panelid)
colnames(fig.data) <- c("year", "president", "party", "outlays","panelid")
# 5 out of 10418 records removed due to NAs
fig.data <- na.omit(fig.data)
# unlog the data to get outlays in $
fig.data[,4] <- exp(fig.data[,4])
out <- matrix(NA, ncol=5,nrow=2007-1984+1)
out[,1] <- seq(1984,2007,1)
colnames(out) <- c("year", "pres match", "pres no match", "dem", "rep")
# calculate average outlays in each year for each category
for (i in 1:nrow(out)) {
	out[i,2] <- mean(fig.data$outlays[fig.data$year==out[i,1] & fig.data$president==1])
	out[i,3] <- mean(fig.data$outlays[fig.data$year==out[i,1] & fig.data$president==0])
	out[i,4] <- mean(fig.data$outlays[fig.data$year==out[i,1] & fig.data$party==0])
	out[i,5] <- mean(fig.data$outlays[fig.data$year==out[i,1] & fig.data$party==1])
}

# divide by 10^7 to get $m
out[,2] <- out[,2]/(10^7)
out[,3] <- out[,3]/(10^7)
out[,4] <- out[,4]/(10^7)
out[,5] <- out[,5]/(10^7)

pdf(file="party_match_and_party_by_year_v1.pdf", width=6, height = 3, family = "Helvetica", pointsize=8)
par(mfrow=c(1,2))

plot(x = out[,1], y = out[,2], col="darkgreen", bg="darkgreen", pch=21, xaxt = "n", yaxt="n", xlab = "", ylab = "", ylim = c(30,120),cex.lab=.75)
points(x = out[,1], y = out[,3], col="darkorange", bg="darkorange", pch=23, cex=1.0)
axis(1, cex.axis=.75, tick=FALSE, line=-1)
axis(2, cex.axis=.75, tick=FALSE, line=-.5, las=1)
mtext("Avg. Spending Per District ($m)",side=2, line=1.75, cex=.75)

legend(x="topleft", legend=c("President's Party", "Opposite Party"), pch=c(21,23), col=c("darkgreen", "darkorange"), pt.bg=c("darkgreen", "darkorange"), cex=.75, bty="n")

plot(x = out[,1], y = out[,4], col="blue", bg="blue", pch=19,xaxt = "n", yaxt="n", xlab = "", ylab = "", ylim = c(30,120),cex.lab=.75)
points(x = out[,1], y = out[,5], col="red", bg = "red", pch=23)
axis(1, cex.axis=.75, tick=FALSE, line=-1)
axis(2, cex.axis=.75, tick=FALSE, line=-.5, las=1)
mtext("Avg. Spending Per District ($m)",side=2, line=1.75, cex=.75)

legend(x="topleft", legend=c("Democrats", "Republicans"), pch=c(19,23), col=c("blue", "red"), cex=.75, bty="n", pt.bg=c("blue", "red"))
dev.off()


# RDD Code


rd.data <- read.csv("bbh_district_data_merged_with_house_races_1982_2004.csv", head=TRUE)
library(xtable)

means <- matrix(nrow=5, ncol=2)
lengths <- matrix(nrow=5, ncol=2)
count <- 1
plot.x <- list()
plot.x2 <- list()
plot.y2 <- list()
plot.y <- list()

# For each bandwidth, subset the data and add that data to list

for (bandwidth in c(0.01, 0.02, 0.03, 0.04, 0.05)) {
	rd.subset <- subset(rd.data, abs(as.numeric(as.character(rd.data$margin))) < bandwidth & !is.na(rd.data$redist) & rd.data$winner != 'O')
	plot.x[[count]] <- as.numeric(as.character(rd.subset$margin[rd.subset$year<2003 & rd.subset$year > 1993]))
	plot.y[[count]] <- rd.subset$high_lnoutlays_cpi[rd.subset$year < 2003 & rd.subset$year > 1993]
	plot.x2[[count]] <- as.numeric(as.character(rd.subset$margin[rd.subset$year<1994 | rd.subset$year>2002]))
	plot.y2[[count]] <- as.numeric(as.character(rd.subset$high_lnoutlays_cpi[rd.subset$year<1994 | rd.subset$year > 2002]))
	count <- count + 1
}

# Get mean of two years of funding, but watch out for times when we only have one year of funding data
# We merged in vote margin corresponding to each year the rep is in office, so if Rep Palmer wins 51 percent of vote
# We would have 1 percent in year t+1 and in year t+2.  Therefore I find the two years by looking for duplicate margin
# values.
for (j in 1:5) {
	dups <- which(duplicated(plot.x[[j]])==TRUE)
	x <- plot.x[[j]][-dups]
	means <- vector(length=length(dups))
	for (i in 1:length(dups)) {
		means[i] <- mean(c(plot.y[[j]][dups[i]], plot.y[[j]][dups[i]-1]), na.rm=TRUE)
	}

	plot.y[[j]][dups-1] <- means
	y <- plot.y[[j]][-dups]
	plot.y[[j]] <- y
	plot.x[[j]] <- x
}

# Clinton years with CI
pdf(file="DemocratRD_WithCI.pdf")
bandwidth <- c(0.01, 0.02, 0.03, 0.04, 0.05)

par(mfrow=c(3,2))
for (i in 1:5) {
	main.title <- paste("Bandwidth  = ", bandwidth[i])
	plot(plot.x[[i]], plot.y[[i]], xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)", pch=20, col="grey")
	title(main=main.title)
	axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
	axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
	abline(v=0)
	reg.data <- cbind(plot.y[[i]], plot.x[[i]])
	y <- reg.data[,1]
	x <- reg.data[,2]

	lo.tx <- loess(y ~ x, data.frame(y, x)[which(x<0),], 	surface="direct")
	x.lo.tx <- seq(-bandwidth[i], 0, .0005)
	pred.tx <- predict(lo.tx, data.frame(x=x.lo.tx), se=T)
	lines(x.lo.tx, pred.tx$fit, lwd=3)
	lines(x.lo.tx, pred.tx$fit+2*pred.tx$se.fit)
	lines(x.lo.tx, pred.tx$fit-2*pred.tx$se.fit)

	hi.tx <- loess(y~x, data.frame(y, x)[which(x>0),], 	surface="direct")
	x.hi.tx <- seq(0, bandwidth[i], .0005)
	pred.tx <- predict(hi.tx, data.frame(x=x.hi.tx), se=T)
	lines(x.hi.tx, pred.tx$fit, lwd=3)
	lines(x.hi.tx, pred.tx$fit+2*pred.tx$se.fit)
	lines(x.hi.tx, pred.tx$fit-2*pred.tx$se.fit)

}
dev.off()

# Clinton Years without CIs

pdf(file="DemNoCI.pdf")
par(mfrow=c(3,2))
plot(plot.x[[1]], plot.y[[1]], xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
title(main="Bandwidth=0.01")
abline(v=0)
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x[[2]], plot.y[[2]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.02")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x[[3]], plot.y[[3]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.03")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x[[4]], plot.y[[4]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.04")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x[[5]], plot.y[[5]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.05")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
dev.off()

# Republican years with CIs

pdf(file="RepublicanRD_WithCI.pdf")
bandwidth <- c(0.01, 0.02, 0.03, 0.04, 0.05)
par(mfrow=c(3,2))
for (i in 1:5) {
	main.title <- paste("Bandwidth  = ", bandwidth[i])
	plot(plot.x2[[i]], plot.y2[[i]], xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)", pch=20, col="grey")
	title(main=main.title)
	axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
	axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
	abline(v=0)
	reg.data <- cbind(plot.y2[[i]], plot.x2[[i]])
	y <- reg.data[,1]
	x <- reg.data[,2]

	lo.tx <- loess(y ~ x, data.frame(y, x)[which(x<0),], 	surface="direct")
	x.lo.tx <- seq(-bandwidth[i], 0, .0005)
	pred.tx <- predict(lo.tx, data.frame(x=x.lo.tx), se=T)
	lines(x.lo.tx, pred.tx$fit, lwd=3)
	lines(x.lo.tx, pred.tx$fit+2*pred.tx$se.fit)
	lines(x.lo.tx, pred.tx$fit-2*pred.tx$se.fit)

	hi.tx <- loess(y~x, data.frame(y, x)[which(x>0),], 	surface="direct")
	x.hi.tx <- seq(0, bandwidth[i], .0005)
	pred.tx <- predict(hi.tx, data.frame(x=x.hi.tx), se=T)
	lines(x.hi.tx, pred.tx$fit, lwd=3)
	lines(x.hi.tx, pred.tx$fit+2*pred.tx$se.fit)
	lines(x.hi.tx, pred.tx$fit-2*pred.tx$se.fit)

}
dev.off()

# Republican years without CIs

pdf(file="RepNoCI.pdf")
par(mfrow=c(3,2))
plot(plot.x2[[1]], plot.y2[[1]], xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
title(main="Bandwidth=0.01")
abline(v=0)
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x2[[2]], plot.y2[[2]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.02")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x2[[3]], plot.y2[[3]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.03")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x[[4]], plot.y[[4]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.04")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
plot(plot.x2[[5]], plot.y2[[5]],xaxt="n", yaxt="n", xlab="Dem Win Margin", ylab="Outlays (Ln)")
abline(v=0)
title(main="Bandwidth=0.05")
axis(side=1, tick=FALSE, cex.axis=.7, line=-.9, las=1)
axis(side=2, tick=FALSE, cex.axis=.7, line=-.9, las=1)
dev.off()


#RD Regressions

library(apsrtable)

model.list<-list()
bv.model.list <- list()
data.list <- list()
count <- 1
for (bandwidth in c(.01, .03, .05)) {
	rd.subset <- subset(rd.data, abs(as.numeric(as.character(rd.data$margin))) < bandwidth & !is.na(rd.data$redist) & rd.data$winner != 'O')	
	data.list[[count]] <- rd.subset
	count <- count + 1
}

# Get rid of second years of funding, just use first year (easier than trying to do mean of the two years)
for (j in 1:3) {
	dups <- which(duplicated(data.list[[j]]$margin)==TRUE & !is.na(data.list[[j]]$margin))
	data.list[[j]] <- data.list[[j]][-dups,]
	rd.ols <- lm(high_lnoutlays_cpi~president+majority+any_rank + leader+mem_approps+mem_waysandmeans+party+freshman+presmargin_state_abs + factor(year), data=data.list[[j]])
	rd.ols.bv <- lm(high_lnoutlays_cpi~president, data=data.list[[j]])
	model.list[[j]] <- rd.ols
	bv.model.list[[j]] <- rd.ols.bv
}


apsrtable(bv.model.list[[1]], bv.model.list[[2]], bv.model.list[[3]], model.names=c("Bandwidth=.01", "Bandwidth=.03", "Bandwidth=.05"))


# Can't figure out how to omit factor(years), so just paste into latex and delete everything past the first instance of "president's vote margin"
library(apsrtable)
apsrtable(model.list[[1]], model.list[[2]], model.list[[3]], coef.names=c("Intercept", "President's Party", "Majority Party", "Ranking Committee Member", "Party Leader", "Appropriations Committee", "Ways & Means Committee", "Republican", "First Term", "President's Vote Margin"), model.names=c("Bandwidth=.01", "Bandwidth=.03", "Bandwidth=.05"), omitcoef=seq(11, length(model.list[[1]])))


# Other appendix tables
# Note that the tables in the paper are generated by hand in LaTeX from this output
# Appendix 6.2, Column 1
summary(lm(high_lnoutlays_cpi~president, data=table1.data))
# Column 2
summary(lm(high_lnoutlays_cpi~president + as.factor(year), data=table1.data))
# Column 3
summary(lm(high_lnoutlays_cpi~president+cong_close+majority+any_rank + leader+mem_approps+mem_waysandmeans+party+freshman+presmargin_state_abs + factor(year), data=table1.data))

# Appendix 6.3, Column 1
summary(lm(high_lnoutlays_cpi~president + as.factor(state), data=table1.data))
# Column 2
summary(lm(high_lnoutlays_cpi~president + as.factor(state) + as.factor(year), data=table1.data))
# Column 3
summary(lm(high_lnoutlays_cpi~president+cong_close+majority+any_rank + leader+mem_approps+mem_waysandmeans+party+freshman+presmargin_state_abs + factor(year) + as.factor(state), data=table1.data))

